***************
* This script generates interactions between all candidate characteristics visible to recruiters
* Author: Daniel Kopp
***************

clear
clear matrix
clear mata
set more off	
set maxvar 11000

use "data_raw\jobroom_data_full_201703_201712.dta"

****************************
* Sample restriction - we drop those candidates that have never been in a search, and those searches for which no search criteria at all has been specified
****************************

* Drop those candidates that have never been in a search
drop if search_tag==.

* Drop all searches where no criteria has been specified
gen s_specified_temp = 0
foreach var of varlist s_job_nr s_job_name s_skills_notes s_workplace_grossregion s_candidate_residence s_workload s_education s_diploma s_experience s_drivers_license s_special_type_of_work s_candidate_availability s_lang1 s_oral1 s_written1 s_lang2 s_written2 s_lang3 s_oral3 s_written3 s_lang4 s_oral4 s_written4 s_oral5 s_written5 s_bn2000_5 s_bn2000_3 s_bn2000_2 s_q93_beruf {
replace s_specified_temp = 1 if `var'!=.
}
bysort search_tag: gegen s_specified = max(s_specified_temp)
tab s_specified  				
drop if s_specified==0
drop s_specified_temp s_specified
	
* We don't need s_lang any more:  
forvalues i=1/5 {
drop s_lang`i' s_oral`i' s_written`i'
}

* And we don't need the cantons in which candidates are searching for work (we do have the aggregate var mobility_no_cantons)
drop ar_*
	
* Transform categorical variables into dummies:

local val_label : value label mobility_no_cantons_cat
flevelsof mobility_no_cantons_cat, local(levels)
foreach i of local levels {
gen byte	mobility_cat`i' = mobility_no_cantons_cat==`i'
local lab_`i' : label `val_label' `i'
label var mobility_cat`i' " `lab_`i'' "
}	

local val_label : value label limited_contract
flevelsof limited_contract, local(levels)
foreach i of local levels {
gen byte 	limcontract`i' = limited_contract==`i'
local lab_`i' : label `val_label' `i'
label var limcontract`i' " `lab_`i'' "
}	

local val_label : value label cod_erfa_searched
flevelsof cod_erfa_searched, local(levels)
foreach i of local levels {
gen byte	erfa_s`i' = cod_erfa_searched==`i'
local lab_`i' : label `val_label' `i'
label var erfa_s`i' " `lab_`i'' "
}	

local val_label : value label beruf_abschluss_searched
flevelsof beruf_abschluss_searched, local(levels)
foreach i of local levels {
gen byte 	abschluss_s`i' = beruf_abschluss_searched==`i'
local lab_`i' : label `val_label' `i'
label var abschluss_s`i' " `lab_`i'' "
}	

local val_label : value label fuehrerausweis_kat_short
flevelsof fuehrerausweis_kat_short, local(levels)
foreach i of local levels {
gen byte	fuehrer_kat`i' = fuehrerausweis_kat_short==`i'
local lab_`i' : label `val_label' `i'
label var fuehrer_kat`i' " `lab_`i'' "
}	

local val_label : value label kanton
flevelsof kanton, local(levels)
foreach i of local levels {
gen byte	kanton`i' = kanton==`i'
local lab_`i' : label `val_label' `i'
label var kanton`i' " `lab_`i'' "
}	

local val_label : value label education_short
flevelsof education_short, local(levels)
foreach i of local levels {
gen byte	educ_short`i' = education_short==`i'
local lab_`i' : label `val_label' `i'
label var educ_short`i' " `lab_`i'' "
}	

local val_label : value label profile_length_cat
flevelsof profile_length_cat, local(levels)
foreach i of local levels {
gen byte	prof_length_`i' = profile_length_cat==`i'
local lab_`i' : label `val_label' `i'
label var prof_length_`i' " `lab_`i'' "
}	

local val_label : value label skill_exp_duration_cat_searched
flevelsof skill_exp_duration_cat_searched, local(levels)
foreach i of local levels {
gen byte 	sk_exp_dur_c`i' = skill_exp_duration_cat_searched==`i'
local lab_`i' : label `val_label' `i'
label var sk_exp_dur_c`i' " `lab_`i'' "
}	

local val_label : value label workvolume_cat
flevelsof workvolume_cat, local(levels)
foreach i of local levels {
gen byte	workvolume_c`i' = workvolume_cat==`i'
local lab_`i' : label `val_label' `i'
label var workvolume_c`i' " `lab_`i'' "
}	

local val_label : value label nation_kat
flevelsof nation_kat, local(levels)
foreach i of local levels {
gen byte	nation_kat`i' = nation_kat==`i'
local lab_`i' : label `val_label' `i'
label var nation_kat`i' " `lab_`i'' "
}	

local val_label : value label name_origin_1_kat
flevelsof name_origin_1_kat, local(levels)
foreach i of local levels {
gen 	byte name_kat`i' = name_origin_1_kat==`i'
local lab_`i' : label `val_label' `i'
label var name_kat`i' " `lab_`i'' "
}	

local val_label : value label lang_region
flevelsof lang_region, local(levels)
foreach i of local levels {
gen byte	lang_region`i' = lang_region==`i'
local lab_`i' : label `val_label' `i'
label var lang_region`i' " `lab_`i'' "
}	

local val_label : value label verfuegbar
flevelsof verfuegbar, local(levels)
foreach i of local levels {
gen byte	verfuegbar`i' = verfuegbar==`i'
local lab_`i' : label `val_label' `i'
label var verfuegbar`i' " `lab_`i'' "
}	

recode bn2000_1_searched (.=9) 						// we recode missing values as unclassified, otherwise we lose too many observations
local val_label : value label bn2000_1_searched
flevelsof bn2000_1_searched, local(levels)
foreach i of local levels {
gen byte	bn2000_1_s`i' = bn2000_1_searched==`i'
replace 	bn2000_1_s`i' = . if bn2000_1_searched==. 	
local lab_`i' : label `val_label' `i'
label var bn2000_1_s`i' " `lab_`i'' "
}	

******************************
* We generate separate categories for the missing values:
******************************

gen byte 	mobility_cat_m = mobility_no_cantons_cat==.
gen byte 	erfa_s_m = cod_erfa_searched==.
gen byte 	abschluss_s_m = beruf_abschluss_searched==.
gen byte	fuehrer_kat_m = fuehrerausweis_kat_short==.
*gen byte	kanton_m = kanton==.							// kanton has no missings
gen byte 	sk_exp_dur_c_m = skill_exp_duration_cat_searched==.
gen byte	workvolume_c_m = workvolume_cat==.
gen byte	nation_kat_m = nation_kat==.
gen byte 	name_kat_m = name_origin_1_kat==.
*gen byte	lang_region_m = lang_region==.					// lang_region has no missings


gen byte 	no_diff_prof_m	= no_diff_prof==.
replace 	no_diff_prof 	= 0 if no_diff_prof_m==1

* Gen l_missing
global lang_dummy_full "l_German l_CH_German l_English l_French  l_Italian l_northwest l_southeuro l_centreast l_Balkan l_middleeast l_Asia l_Other l_missing"
gen 	l_missing = l_German==.   // if l_German is missing, all other l_X vars are missing as well
foreach var of varlist $lang_dummy_full {
replace `var' = 0 if `var'==. 
}

*********
* Rename var
*********

* Rename Var because Varname too long for interactions
rename skill_*_searched sk_*
rename sk_experience sk_exp
rename sk_educ_tertiary sk_educ_tert
rename sk_educ_weiterbild sk_educ_weiter
rename sk_education_lehre sk_educ_lehre
rename sk_l_middleeast_turk sk_lmiddleeast
rename sk_l_southern_europe sk_lsoutheuro
rename sk_l_centr_east_euro sk_lcentreast
rename sk_l_other_countries sk_lother
rename sk_l_german sk_lgerman 
rename sk_l_french sk_lfrench  
rename sk_l_italian sk_litalian
rename sk_l_rumansh sk_lrumansh			// There are no obs with this skill. Hence, we do not interact it
rename sk_l_english sk_lenglish 
rename sk_l_balkan sk_lbalkan 
rename sk_l_asia sk_lasia
rename sk_l_ch_german sk_lchgerman
rename sk_detected_language sk_det_lang
rename sk_it_skills_general sk_it_gen
rename sk_it_skills_deep sk_it_deep
rename sk_est_n_words sk_est_n_word

local val_label : value label sk_det_lang
flevelsof sk_det_lang, local(levels)
foreach i of local levels {
gen byte	sk_det_lang`i' = sk_det_lang==`i'
local lab_`i' : label `val_label' `i'
label var sk_det_lang`i' " `lab_`i'' "
}	
tab sk_det_lang1, m 
gen byte	sk_det_lang_m = sk_det_lang==.

clonevar zuletzt_s 		= fyn_zuletzt_searched
clonevar gesch_kontakt 	= fyn_geschuetzt_kontaktangaben

foreach var of varlist af_* {
	gen clone`var' = `var'
	rename cloneaf_* *
}
rename Sonn_Feiertagsarbeit Sonn_Feiertag

rename num_vermittlungs_grad workvolume

clonevar no_cand_click 	= no_cand_clicked_before_search
clonevar no_searchvar 	= no_searched_var

label var geschlecht "Gender"
label var workvolume "Workvolume" 

****
* Gen globals for variables that we want to interact (only candidate characteristics)
****

global skills_dummy_full "has_skills sk_det_lang0 sk_det_lang1 sk_det_lang2 sk_det_lang3 sk_det_lang4 sk_det_lang_m sk_exp  sk_education   sk_educ_tert  sk_educ_lehre  sk_educ_weiter sk_softskills sk_it_gen sk_it_deep sk_machines sk_leadership sk_language sk_lgerman sk_lfrench sk_litalian sk_lsoutheuro sk_lother sk_lenglish sk_lmiddleeast sk_lbalkan sk_lcentreast sk_lasia sk_lchgerman sk_exp_dur_c0 sk_exp_dur_c1 sk_exp_dur_c2 sk_exp_dur_c3 sk_exp_dur_c4 sk_exp_dur_c_m" 

* Without language skills except the languages that are spoken in Switzerland plus english (sk_lgerman sk_lfrench sk_lenglish sk_litalian sk_lchgerman
global skills_dummy "has_skills sk_det_lang0 sk_det_lang1 sk_det_lang2 sk_det_lang3 sk_det_lang4 sk_exp  sk_education   sk_educ_tert  sk_educ_lehre  sk_educ_weiter sk_softskills sk_it_gen sk_it_deep sk_machines sk_leadership sk_language sk_lgerman sk_lfrench sk_litalian sk_lenglish sk_lchgerman sk_exp_dur_c0 sk_exp_dur_c1 sk_exp_dur_c2 sk_exp_dur_c3 sk_exp_dur_c4" 

global lang_dummy_full "l_German l_CH_German l_English l_French  l_Italian l_northwest l_southeuro l_centreast l_Balkan l_middleeast l_Asia l_Other l_missing"

* Only languages that are spoken in Switzerland plus english 
global lang_dummy "l_German l_CH_German l_English l_French  l_Italian l_missing"

global other_dummy "workvolume_c1 workvolume_c2 workvolume_c3 workvolume_c_m erfa_s0 erfa_s1 erfa_s2 erfa_s3 erfa_s_m mobility_cat0 mobility_cat1 mobility_cat2 mobility_cat3 mobility_cat4 mobility_cat_m limcontract0 limcontract1 limcontract99 verfuegbar0 verfuegbar1 verfuegbar99 no_searchvar no_diff_prof_m zuletzt_s  abschluss_s0 abschluss_s1 abschluss_s2 abschluss_s3 abschluss_s_m gesch_kontakt Sonn_Feiertag Schichtarbeit Nachtarbeit Heimarbeit Lehre fuehrer_kat0 fuehrer_kat1 fuehrer_kat2 fuehrer_kat3 fuehrer_kat4 fuehrer_kat5 fuehrer_kat6 fuehrer_kat_m educ_short0 educ_short1 educ_short2 educ_short99  prof_length_1 prof_length_2 prof_length_3 prof_length_4 prof_length_99 user_logged_in"
global nation_name "nation_kat0 nation_kat1 nation_kat2 nation_kat3  nation_kat4 nation_kat5 nation_kat6 nation_kat8 nation_kat9 name_kat0 name_kat1 name_kat2 name_kat3 name_kat4 name_kat5 name_kat6 name_kat7 name_kat8 nation_kat_m name_kat_m"
global geschlecht "geschlecht"
global kanton "kanton1 kanton2 kanton3 kanton4 kanton5 kanton6 kanton7 kanton8 kanton9 kanton10 kanton11 kanton12 kanton13 kanton14 kanton15 kanton16 kanton17 kanton18 kanton19 kanton20 kanton21 kanton22 kanton23 kanton24 kanton25 kanton26 "
global lang_region "lang_region1 lang_region2 lang_region3 lang_region4"
global other_contin " sk_est_n_word no_diff_prof prof_exp_tot"
global occup_dummy "bn2000_1_s1 bn2000_1_s2 bn2000_1_s3 bn2000_1_s4 bn2000_1_s5 bn2000_1_s6 bn2000_1_s7 bn2000_1_s8 bn2000_1_s9"

* Save all dummies as byte:
foreach var of varlist $skills_dummy_full $lang_dummy_full $other_dummy $nation_name $geschlecht $lang_region {
	recast byte `var'
}

* Gen square of continuous variables
local squared 
local contin $other_contin
foreach x of local contin { 
	gen int `x'_2 = `x'^2 
	local tempname = "`x'_2" 
	local squared : list squared | tempname 
} 
global squared "`squared'"

* Show missing values:
foreach var of varlist  $other_dummy $nation_name $geschlecht $lang_dummy_full $skills_dummy_full  $other_contin $squared $lang_region $occup_dummy {
	qui: count if missing(`var')
	if `r(N)'>0 {
	di " `var' has	 `r(N)' missings"		// -> THERE ARE NO MISSING VALUE!
	}
}


********************************************
* Gen global for all Var we want to interact (without gender and nationality/name/exotic languages since we create these interactions below):
******************************************** 

* First step: Only dummies
local all_var  "$other_dummy $lang_dummy $skills_dummy $occup_dummy $lang_region "  
local interactvar
local nvar : word count `all_var' 
forvalues ii = 1/`nvar' { 
	local start = `ii'+1 
	forvalues jj = `start'/`nvar' { 
		local temp1 : word `ii' of `all_var' 
		local temp2 : word `jj' of `all_var' 
		gen byte `temp1'X`temp2' = `temp1'*`temp2' 
		local tempname = " `temp1'X`temp2' " 
		local interactvar : list interactvar | tempname 
	} 
} 
global interactvar " `interactvar' "

* Second step: Now we also interact the dummies with the continuous variables.
local all_var " $other_dummy  $lang_dummy $skills_dummy $occup_dummy $lang_region "
local interactvar  $interactvar
local nvar : word count `all_var'
local nvarcont : word count $other_contin  $squared 
forvalues ii = 1/`nvar' { 
		forvalues jj = 1/`nvarcont' { 
		local tempreg1 : word `ii' of `all_var'
		local tempreg2 : word `jj' of $other_contin  $squared 
		gen int `tempreg1'X`tempreg2' = `tempreg1'*`tempreg2' 
		local tempname_reg = " `tempreg1'X`tempreg2' " 
		local interactvar : list interactvar | tempname_reg 
	} 
} 
global interactvar " `interactvar' "

* Third step: Add interactions between continuous var. 
local all_var  "$other_contin  $squared  "  
local interactvar $interactvar
local nvar : word count `all_var' 
forvalues ii = 1/`nvar' { 
	local start = `ii'+1 
	forvalues jj = `start'/`nvar' { 
		local temp1 : word `ii' of `all_var' 
		local temp2 : word `jj' of `all_var' 
		gen long `temp1'X`temp2' = `temp1'*`temp2' 
		local tempname = " `temp1'X`temp2' " 
		local interactvar : list interactvar | tempname 
	} 
} 
global interactvar " `interactvar' "

********************************************
* Gen interactions with gender:
********************************************

* First step: Only dummies.
local all_var  "$other_dummy $lang_dummy $skills_dummy $lang_region $occup_dummy " 
local interactgender 
local nvar : word count `all_var' 
local temp1 "geschlecht"
forvalues i = 1/`nvar' { 
		local temp2 : word `i' of `all_var' 
		gen byte `temp1'X`temp2' = `temp1'*`temp2' 
		local tempname = " `temp1'X`temp2' " 
		local interactgender : list interactgender | tempname  
} 
global interactgender " `interactgender' "

* Second step: Now we also interact gender with the continuous variables. 
local continvar " $other_contin  $squared  "
local interactgender  $interactgender
local nvarcont : word count `continvar'
local temp1 "geschlecht"
forvalues i = 1/`nvarcont' { 
		local temp2 : word `i' of `continvar'
		gen int `temp1'X`temp2' = `temp1'*`temp2' 
		local tempname = " `temp1'X`temp2' " 
		local interactgender : list interactgender | tempname 
} 
global interactgender " `interactgender' "
di "$interactgender"


********************************************
* Gen interactions with ethnicity  
********************************************

* Gen ethnicity interaction dummies (here we use lang_dummy_full and skills_dummy_full)
local interactethnicity ""
local nation_name "nation_kat0 nation_kat1 nation_kat2 nation_kat3  nation_kat4 nation_kat5 nation_kat6 nation_kat8 nation_kat9 name_kat0 name_kat1 name_kat2 name_kat3 name_kat4 name_kat5 name_kat6 name_kat7 name_kat8 nation_kat_m name_kat_m "
local dummies "$other_dummy $lang_dummy_full $skills_dummy_full $lang_region $occup_dummy"
local n_nation_name : word count `nation_name'
local n_dummies : word count `dummies'
forvalues i = 1/`n_nation_name' {
	forvalues j = 1/`n_dummies' {
		local temp1 : word `i' of `nation_name'
		local temp2 : word `j' of `dummies'
		gen byte `temp1'X`temp2' = `temp1'*`temp2'
		local varname " `temp1'X`temp2' "
		local interactethnicity: list interactethnicity | varname
	}	
}
global interactethnicity "`interactethnicity'"

* Add nation_kat interactions with cont. var
local interactethnicity "$interactethnicity"
local nation_name "nation_kat0 nation_kat1 nation_kat2 nation_kat3  nation_kat4 nation_kat5 nation_kat6 nation_kat8 nation_kat9 name_kat0 name_kat1 name_kat2 name_kat3 name_kat4 name_kat5 name_kat6 name_kat7 name_kat8 nation_kat_m name_kat_m"
local continvar " $other_contin  $squared  "
local n_nation_name : word count `nation_name'
local n_contin : word count `continvar'
forvalues i = 1/`n_nation_name' {
	forvalues j = 1/`n_contin' {
		local temp1 : word `i' of `nation_name'
		local temp2 : word `j' of `continvar'
		gen int `temp1'X`temp2' = `temp1'*`temp2'
		local varname " `temp1'X`temp2' "
		local interactethnicity : list interactethnicity | varname
	}
}
global interactethnicity "`interactethnicity'"
di "$interactethnicity"

* Drop variables with constant values
local drop " " 
foreach var of varlist  $interactvar $interactgender $interactethnicity  {
	qui: sum `var'
	if r(min)==r(max) {
		local drop "`drop' `var'" 
		}
}
global drop " `drop' "
di " $drop "

drop $drop

local interactvar $interactvar
local drop $drop 
local interactvar: list interactvar - drop
global interactvar `interactvar'

local interactgender $interactgender
local drop $drop 
local interactgender: list interactgender - drop
global interactgender `interactgender'

local interactethnicity $interactethnicity
local drop $drop 
local interactethnicity: list interactethnicity - drop
global interactethnicity `interactethnicity'

  
preserve
keep $interactvar
keep if _n==1
export excel "Help_files\interactvar.xlsx", firstrow(variables) replace
restore

preserve
keep $interactgender
keep if _n==1
export excel "Help_files\interactgender.xlsx", firstrow(variables) replace
restore

preserve
keep $interactethnicity
keep if _n==1
export excel "Help_files\interactethnicity.xlsx", firstrow(variables) replace
restore

_strip_labels q93_beruf_searched
_strip_labels s_bn2000_1

compress	

save "data_processed\with_interactions_compr_all_full.dta", replace
